---
title: "Discriptive statistics"
author: "Dai Yamao"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output:
  html_document: default
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.width = 10, fig.height = 5)
rm(list=ls())

require(memisc)
require(stringi)
library(stargazer)
library(coefplot)
library(interplot)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(stargazer)
library(stringr)
library(tidyverse)
library(ggplot2)
library(psych)
library(margins)
library(dplyr)
library(modelsummary)
library(makedummies)
library(ggeffects)
library(estimatr)
library(ggpubr)
library(BalanceR)
library(summarytools)
library(tidyverse)
library(ggplot2)
library(emmeans)
library(gridExtra)
library(patchwork)
library(gtsummary)

dat <- read_csv("data/Iraq_election2025.csv")

dat <- dat |> 
  mutate(Gender = as.numeric(D1),
         Age = D2 + 17,
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Sect = case_when(D6 == 1 ~ "Sunni",
                       D6 == 2 ~ "Shia",
                       D6 == 3 ~ "Christian",
                       D6 == 4 ~ "Kurd",
                       D6 == 5 ~ "Other"),
         Gender = factor(Gender, levels = c(1, 2),
                    labels = c("Male", "Female")))

dat_clean <- dat |>
  filter(
    !is.na(Gender),
    !is.na(Sect)
  )

```

# Plot gender, age, and sect
```{r}
p_gender <- ggplot(dat_clean, aes(x = Gender)) +
  geom_bar(aes(y = after_stat(count / sum(count) * 100))) +
  geom_text(
    stat = "count",
    aes(
      y = after_stat(count / sum(count) * 100),
      label = sprintf("%.1f%%", after_stat(count / sum(count) * 100))
    ),
    vjust = -0.3
  ) +
  labs(
    title = "Gender",
    x = "Gender",
    y = "Percentage"
  ) +
  theme_minimal()

p_age <- ggplot(dat_clean, aes(x = Age)) +
  geom_histogram(binwidth = 5) +
  labs(
    title = "Age",
    x = "Age",
    y = "Count"
  ) +
  theme_minimal()

p_sect <- ggplot(dat_clean, aes(x = Sect)) +
  geom_bar(aes(y = after_stat(count / sum(count) * 100))) +
  geom_text(
    stat = "count",
    aes(
      y = after_stat(count / sum(count) * 100),
      label = sprintf("%.1f%%", after_stat(count / sum(count) * 100))
    ),
    vjust = -0.3
  ) +
  labs(
    title = "Sect",
    x = "Sect",
    y = "Percentage"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

p_gender | p_age | p_sect

```

# descriptive statistics
```{r}
dat_desc <- dat_clean |>
  select(Gender, Age, Education, Income)

psych::describe(dat_desc)[, c("n", "mean", "sd", "min", "max")]


```
